Load libraries

library(miloR)
library(Seurat)
library(SingleCellExperiment)
library(scater)
library(scuttle)
library(scran)
library(org.Hs.eg.db)
library(org.Mm.eg.db)
library(clusterProfiler)
library(msigdbr)
library(dplyr)
library(ggplot2)
library(patchwork)
library(ggpubr)
theme_set(theme_bw())

Load objects

We will simply read the files we created with the combined seurat objects from human and mouse cells.

hg19_scRNA <- readRDS("data/R_objects/hg19_RNA.rds")
mm10_scRNA <- readRDS("data/R_objects/mm10_RNA.rds")

Add metadata

We will now add metadata columns indicating the subtype of breast cancer and whether the tumor is resistant or sensitive to chemotherapy

hg19_scRNA@meta.data <- hg19_scRNA@meta.data %>%
  mutate(subtype=case_when(
    orig.ident=="HBCx-95"| orig.ident=="HBCx-95_CAPAR"~"Triple_negative",
    orig.ident=="HBCx-22"| orig.ident=="HBCx22-TAMR"~"Luminal")) %>% 
  mutate(drug_resistance=case_when(
    grepl("R", orig.ident)==T~"Resistant",
    grepl("R", orig.ident)==F~"Sensitive"),
    sample=rownames(.))

mm10_scRNA@meta.data <- mm10_scRNA@meta.data %>%
  mutate(subtype=case_when(
    orig.ident=="HBCx-95"| orig.ident=="HBCx-95_CAPAR"~"Triple_negative",
    orig.ident=="HBCx-22"| orig.ident=="HBCx22-TAMR"~"Luminal")) %>% 
  mutate(drug_resistance=case_when(
    grepl("R", orig.ident)==T~"Resistant",
    grepl("R", orig.ident)==F~"Sensitive"),
    sample=rownames(.))

Create Milo object

# Create object that can store the KNN graph as a reduced dimension
hg19_scRNA_milo <- as.SingleCellExperiment(hg19_scRNA)
hg19_scRNA_milo <- Milo(hg19_scRNA_milo)

# Same for mouse
mm10_scRNA_milo <- as.SingleCellExperiment(mm10_scRNA)
mm10_scRNA_milo <- Milo(mm10_scRNA_milo)

Construct KNN graph

hg19_scRNA_milo <- buildGraph(hg19_scRNA_milo, k=20, d=30)
## Constructing kNN graph with k:20
mm10_scRNA_milo <- buildGraph(mm10_scRNA_milo, k=30, d=30)
## Constructing kNN graph with k:30

Define neighbourhoods

hg19_scRNA_milo <- makeNhoods(hg19_scRNA_milo, prop = 0.2, k = 20, d=30, refined = TRUE)
## Checking valid object
## Running refined sampling with reduced_dim
plotNhoodSizeHist(hg19_scRNA_milo)

mm10_scRNA_milo <- makeNhoods(mm10_scRNA_milo, prop = 0.2, k = 30, d=30, refined = TRUE)
## Checking valid object
## Running refined sampling with reduced_dim
plotNhoodSizeHist(mm10_scRNA_milo)

Note that we have selected k for constructing the graph and defining neighbours to achieve a distribution peak between 50 and 100

Count the number of cells in neighbourhoods

hg19_scRNA_milo <- countCells(hg19_scRNA_milo, meta.data = data.frame(colData(hg19_scRNA_milo)), sample="orig.ident")
## Checking meta.data validity
## Counting cells in neighbourhoods
mm10_scRNA_milo <- countCells(mm10_scRNA_milo, meta.data = data.frame(colData(mm10_scRNA_milo)), sample="orig.ident")
## Checking meta.data validity
## Counting cells in neighbourhoods

Differential abundance testing

In this step we will perform a statistical test to find populations that are significantly more or less abundant in one group in comparison to another.

# Create design matrix
design_hg19 <- data.frame(colData(hg19_scRNA_milo))[,c("subtype", "drug_resistance","orig.ident")]
design_hg19 <- distinct(design_hg19)
rownames(design_hg19) <- design_hg19[,"orig.ident"]
design_hg19
##                       subtype drug_resistance    orig.ident
## HBCx-95       Triple_negative       Sensitive       HBCx-95
## HBCx-95_CAPAR Triple_negative       Resistant HBCx-95_CAPAR
## HBCx-22               Luminal       Sensitive       HBCx-22
## HBCx22-TAMR           Luminal       Resistant   HBCx22-TAMR
# Calculate distances
hg19_scRNA_milo <- calcNhoodDistance(hg19_scRNA_milo, d=30)
## as(<dgTMatrix>, "dgCMatrix") is deprecated since Matrix 1.5-0; do as(., "CsparseMatrix") instead
# Perform the statistical test
comparison_subtype <- testNhoods(hg19_scRNA_milo, design = ~ drug_resistance+subtype, design.df = design_hg19)
## Using TMM normalisation
## Performing spatial FDR correction withk-distance weighting
comparison_resistance <- testNhoods(hg19_scRNA_milo, design = ~subtype + drug_resistance, design.df = design_hg19)
## Using TMM normalisation
## Performing spatial FDR correction withk-distance weighting
# Create design matrix
design_mm10 <- data.frame(colData(mm10_scRNA_milo))[,c("subtype", "drug_resistance","orig.ident")]
design_mm10 <- distinct(design_mm10)
rownames(design_mm10) <- design_mm10[,"orig.ident"]
design_mm10
##                       subtype drug_resistance    orig.ident
## HBCx-95       Triple_negative       Sensitive       HBCx-95
## HBCx-95_CAPAR Triple_negative       Resistant HBCx-95_CAPAR
## HBCx-22               Luminal       Sensitive       HBCx-22
## HBCx22-TAMR           Luminal       Resistant   HBCx22-TAMR
# Calculate distances
mm10_scRNA_milo <- calcNhoodDistance(mm10_scRNA_milo, d=30)

# Perform the statistical test
comparison_subtype_mm10 <- testNhoods(mm10_scRNA_milo, design = ~ drug_resistance+subtype, design.df = design_mm10)
## Using TMM normalisation
## Performing spatial FDR correction withk-distance weighting
comparison_resistance_mm10 <- testNhoods(mm10_scRNA_milo, design = ~ subtype+drug_resistance, design.df = design_mm10)
## Using TMM normalisation
## Performing spatial FDR correction withk-distance weighting

Check that the test was balanced

F1_A <- ggplot(comparison_subtype, aes(PValue)) + geom_histogram(bins=50) + ggtitle("P-value distribution for contrasts by subtype")
F1_B <- ggplot(comparison_resistance, aes(PValue)) + geom_histogram(bins=50) + ggtitle("P-value distribution for contrasts by drug sensitivity")

ggarrange(F1_A, F1_B)

F4_A <-ggplot(comparison_subtype_mm10, aes(PValue)) + geom_histogram(bins=50) + ggtitle("P-value distribution for contrasts by subtype")
F4_B <-ggplot(comparison_resistance_mm10, aes(PValue)) + geom_histogram(bins=50) + ggtitle("P-value distribution for contrasts by drug sensitivity")
ggarrange(F4_A, F4_B)

Volcano plots of neighbourhoods

ggplot(comparison_subtype, aes(logFC, -log10(SpatialFDR))) +
  geom_point() +
  geom_hline(yintercept = 1)

ggplot(comparison_resistance, aes(logFC, -log10(SpatialFDR))) +
  geom_point() +
  geom_hline(yintercept = 1)

ggplot(comparison_subtype_mm10, aes(logFC, -log10(SpatialFDR))) +
  geom_point() +
  geom_hline(yintercept = 1)

ggplot(comparison_resistance_mm10, aes(logFC, -log10(SpatialFDR))) +
  geom_point() +
  geom_hline(yintercept = 1)

hg19_scRNA_milo <- buildNhoodGraph(hg19_scRNA_milo)

## Plot single-cell UMAP
umap_hg19 <- plotReducedDim(hg19_scRNA_milo, dimred = "UMAP", colour_by="cluster_name", text_by = "cluster_name", text_size = 8) + NoLegend()
## Plot neighbourhood graph
nh_graph_hg19 <- plotNhoodGraphDA(hg19_scRNA_milo, comparison_subtype, layout="UMAP",alpha=0.05)
nh_graph_resistance_hg19 <- plotNhoodGraphDA(hg19_scRNA_milo, comparison_resistance, layout="UMAP",alpha=0.05)
comparison_subtype <- annotateNhoods(hg19_scRNA_milo, comparison_subtype, coldata_col = "cluster_name")
head(comparison_subtype)
##        logFC   logCPM          F     PValue       FDR Nhood SpatialFDR
## 1 -0.9888326 11.87918 0.55462155 0.45705563 0.9655209     1  0.9677191
## 2 -0.4101012 12.60643 0.43488617 0.51013924 0.9784163     2  0.9755292
## 3  0.3058800 11.90601 0.06314886 0.80176955 0.9784163     3  0.9755292
## 4  2.3094264 11.81942 3.41015312 0.06584408 0.4220007     4  0.4388231
## 5 -0.4162361 12.12443 0.14770026 0.70103247 0.9784163     5  0.9755292
## 6  0.2553400 12.04828 0.04829574 0.82621480 0.9784163     6  0.9755292
##       cluster_name cluster_name_fraction
## 1 Helper-T-Cells 1             0.8630137
## 2 Helper-T-Cells 1             1.0000000
## 3      Fibroblasts             1.0000000
## 4      Fibroblasts             0.9850746
## 5      Fibroblasts             0.9701493
## 6         NK cells             0.9367089
beeswarm_hg19 <- plotDAbeeswarm(comparison_subtype, group.by = "cluster_name") + theme(text = element_text(size = 15)) 
## Converting group.by to factor...
comparison_resistance <- annotateNhoods(hg19_scRNA_milo, comparison_resistance, coldata_col = "cluster_name")
head(comparison_resistance)
##        logFC   logCPM          F    PValue       FDR Nhood SpatialFDR
## 1 -0.5388183 11.87918 0.16685978 0.6832278 0.9986069     1  0.9986069
## 2 -0.2814023 12.60643 0.20513737 0.6509540 0.9986069     2  0.9986069
## 3 -0.2723636 11.90601 0.05018627 0.8229017 0.9986069     3  0.9986069
## 4  0.7341273 11.81942 0.36115435 0.5483486 0.9986069     4  0.9986069
## 5  0.1369383 12.12443 0.01623871 0.8986899 0.9986069     5  0.9986069
## 6  0.1704957 12.04828 0.02157892 0.8833178 0.9986069     6  0.9986069
##       cluster_name cluster_name_fraction
## 1 Helper-T-Cells 1             0.8630137
## 2 Helper-T-Cells 1             1.0000000
## 3      Fibroblasts             1.0000000
## 4      Fibroblasts             0.9850746
## 5      Fibroblasts             0.9701493
## 6         NK cells             0.9367089
beeswarm_drug_r_hg19 <- plotDAbeeswarm(comparison_resistance, group.by = "cluster_name") + theme(text = element_text(size = 17))
## Converting group.by to factor...
ggarrange(umap_hg19,nh_graph_hg19,beeswarm_hg19, nrow=1, common.legend=F, legend="right")

ggarrange(umap_hg19,nh_graph_resistance_hg19, nrow=1, common.legend=F, legend="right")

Now we’ll do the same for mouse cells

mm10_scRNA_milo <- buildNhoodGraph(mm10_scRNA_milo)

## Plot single-cell UMAP
umap_mm10 <- plotReducedDim(mm10_scRNA_milo, dimred = "UMAP", colour_by="cluster_name", text_by = "cluster_name", text_size = 8) +
  guides(fill="none")

## Plot neighbourhood graph
nh_graph_mm10 <- plotNhoodGraphDA(mm10_scRNA_milo, comparison_subtype, layout="UMAP",alpha=0.05)
nh_graph_resistance_mm10 <- plotNhoodGraphDA(mm10_scRNA_milo, comparison_resistance, layout="UMAP",alpha=0.05)
comparison_subtype_mm10 <- annotateNhoods(mm10_scRNA_milo, comparison_subtype_mm10, coldata_col = "cluster_name")
head(comparison_subtype_mm10)
##        logFC   logCPM          F      PValue       FDR Nhood SpatialFDR
## 1 -1.3409163 10.79332 2.02919644 0.155017519 0.6292723     1  0.6299447
## 2  2.9603631 10.91700 7.39373190 0.006870605 0.2304295     2  0.2610097
## 3  1.8937498 11.11692 3.90443508 0.048788812 0.4653760     3  0.4707781
## 4  1.2934908 11.60930 1.65334336 0.199186966 0.6789249     4  0.6759204
## 5  0.1090573 11.79476 0.01461196 0.903841811 0.9605876     5  0.9517545
## 6  0.8010234 11.78570 0.78005982 0.377609564 0.7622119     6  0.7353875
##        cluster_name cluster_name_fraction
## 1     Fibroblasts 1             0.9782609
## 2     Macrophages 1             0.9545455
## 3     Macrophages 1             0.9629630
## 4     Macrophages 1             1.0000000
## 5 Endothelial cells             1.0000000
## 6     Fibroblasts 4             1.0000000
beeswarm_mm10 <- plotDAbeeswarm(comparison_subtype_mm10, group.by = "cluster_name") + theme(text = element_text(size = 15)) 
## Converting group.by to factor...
comparison_resistance_mm10 <- annotateNhoods(mm10_scRNA_milo, comparison_resistance_mm10, coldata_col = "cluster_name")
head(comparison_resistance_mm10)
##        logFC   logCPM         F     PValue       FDR Nhood SpatialFDR
## 1  1.9347880 10.79332 4.1911020 0.04123552 0.6420960     1  0.6804031
## 2 -1.6163868 10.91700 2.3002897 0.13025045 0.6928377     2  0.7046129
## 3 -0.6384238 11.11692 0.4578582 0.49898545 0.8131859     3  0.8163237
## 4 -1.7584227 11.60930 2.9733330 0.08535607 0.6928377     4  0.7046129
## 5  1.0969756 11.79476 1.4900876 0.22286261 0.6928377     5  0.7091971
## 6 -0.8465461 11.78570 0.8726003 0.35075348 0.7682516     6  0.7814215
##        cluster_name cluster_name_fraction
## 1     Fibroblasts 1             0.9782609
## 2     Macrophages 1             0.9545455
## 3     Macrophages 1             0.9629630
## 4     Macrophages 1             1.0000000
## 5 Endothelial cells             1.0000000
## 6     Fibroblasts 4             1.0000000
beeswarm_drug_r_mm10 <- plotDAbeeswarm(comparison_resistance_mm10, group.by = "cluster_name") + theme(text = element_text(size = 17))
## Converting group.by to factor...
ggarrange(umap_mm10,nh_graph_mm10,beeswarm_mm10, nrow=1, common.legend=F, legend="right")

ggarrange(umap_mm10,nh_graph_resistance_mm10, nrow=1, common.legend=F, legend="right")

Differential expression between cell populations

We will create a new object because this pseudobulk methodoloy requires raw counts rather than normalized, scaled ones.

Human cells

metadata_hg19 <- hg19_scRNA@meta.data
metadata_hg19$cluster_name <- factor(hg19_scRNA@active.ident)

hg19_scRNA_pseudo <- SingleCellExperiment(assays=list(counts=hg19_scRNA@assays$RNA@counts),
                                        colData=metadata_hg19)


summed_counts_hg19 <- aggregateAcrossCells(hg19_scRNA_pseudo, 
    ids=colData(hg19_scRNA_pseudo)[,c("cluster_name","drug_resistance","subtype")], use.assay.type="counts")
summed_counts_hg19
## class: SingleCellExperiment 
## dim: 32738 20 
## metadata(0):
## assays(1): counts
## rownames(32738): MIR1302-10 FAM138A ... AC002321.2 AC002321.1
## rowData names(0):
## colnames: NULL
## colData names(15): orig.ident nCount_RNA ... subtype ncells
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
# Remove combinations with few cells
summed_counts_filt_hg19 <- summed_counts_hg19[,summed_counts_hg19$ncells>=10]

# We will use this wrapper function from the scatter package, that combines all steps required for differential expression analysis with edgeR
DE_subtype_hg19 <- pseudoBulkDGE(summed_counts_filt_hg19, 
    label=summed_counts_filt_hg19$cluster_name,
    design=~drug_resistance+subtype, # Effect of subtype, accounting for drug resistance
    coef="subtypeTriple_negative",
    condition=summed_counts_filt_hg19$subtype 
)

DE_subtype_hg19_genes <- list()

for (i in 1:length(DE_subtype_hg19)){
  filtered_df <- DE_subtype_hg19[[i]] %>% 
    as.data.frame() %>% 
    #dplyr::filter(abs(logFC)>1 & FDR<0.01) %>% 
    mutate(cluster_name=names(DE_subtype_hg19)[i],
           gene=rownames(.))
  DE_subtype_hg19_genes[[i]] <- filtered_df
}

# Same for drug resistance effect

DE_drug_r_hg19 <- pseudoBulkDGE(summed_counts_filt_hg19, 
    label=summed_counts_filt_hg19$cluster_name,
    design=~subtype + drug_resistance, # Effect drug resistance, accounting for subtype
    coef="drug_resistanceSensitive",
    condition=summed_counts_filt_hg19$drug_resistance
)

DE_drug_r_hg19_genes <- list()

for (i in 1:length(DE_drug_r_hg19)){
  filtered_df <- DE_drug_r_hg19[[i]] %>% 
    as.data.frame() %>% 
    #dplyr::filter(abs(logFC)>0.5 & FDR<0.05) %>% 
    mutate(cluster_name=names(DE_drug_r_hg19)[i],
           gene=rownames(.))
  DE_drug_r_hg19_genes[[i]] <- filtered_df
}
volcano_colors <- c("cornflowerblue", "darkred","grey")
names(volcano_colors) <- c("DOWN", "UP", "NO")


plots_subtype_hg19 <- list()

for (i in 1:length(DE_subtype_hg19_genes)){
  df = DE_subtype_hg19_genes[[i]]
  df[is.na(df)] <- 0
  df$diffexpressed <- "NO"
  df$diffexpressed[df$logFC > 1 & df$FDR < 0.01] <- "UP"
  df$diffexpressed[df$logFC < -1 & df$FDR < 0.01] <- "DOWN"
  p <- ggplot(data=df, aes(x=logFC, y=-log10(FDR), col=diffexpressed)) + 
    geom_point() + 
    theme_minimal() + 
    scale_color_manual(values=volcano_colors) +
    ggtitle(paste0(unique(df$cluster_name))) +
    ylim(0,50)
  plots_subtype_hg19[[i]] <-p
}

legend_volcano <- get_legend(plots_subtype_hg19[[1]])

ggarrange(plotlist=plots_subtype_hg19, legend.grob = legend_volcano, nrow = 1, legend="right")

plots_resistance_hg19 <- list()

for (i in 1:length(DE_drug_r_hg19_genes)){
  df = DE_drug_r_hg19_genes[[i]]
  df[is.na(df)] <- 0
  df$diffexpressed <- "NO"
  df$diffexpressed[df$logFC > 1 & df$FDR < 0.01] <- "UP"
  df$diffexpressed[df$logFC < -1 & df$FDR < 0.01] <- "DOWN"
  p <- ggplot(data=df, aes(x=logFC, y=-log10(FDR), col=diffexpressed)) + 
    geom_point() + 
    theme_minimal() + 
    scale_color_manual(values=volcano_colors) +
    ggtitle(paste0(unique(df$cluster_name))) +
    ylim(0,10)
  plots_resistance_hg19[[i]] <-p
}

ggarrange(plotlist=plots_resistance_hg19, legend.grob = legend_volcano, nrow = 1, legend="right")

Let’s perform a functional enrichment analysis to see the functions these genes participate in:

Gene ontology

plot_GO <- function(df, db){
  df <- df %>% dplyr::filter(abs(logFC)>1 & FDR<0.01)
  # Calculate enrichment
  enrichment <- enrichGO(gene = df$gene,
             OrgDb  = db,
             keyType = "SYMBOL",
             ont  = "BP",
             pAdjustMethod = "BH",
             pvalueCutoff  = 0.01,
             qvalueCutoff  = 0.05)
  enrichment <- clusterProfiler::simplify(enrichment, cutoff=0.7, by="p.adjust", select_fun=min)
  
  GO_ggdata <- enrichment %>%
   as_data_frame() %>%
   arrange(Count)
  GO_ggdata$Description <- factor(GO_ggdata$Description, levels = GO_ggdata$Description)
  print(tail(GO_ggdata, n=20L))
  
ggplot(GO_ggdata, aes(x = Description, y = Count, fill = p.adjust)) +
 geom_bar(stat = "identity") +
 scale_colour_viridis_d(begin=0,end=1) +
 coord_flip() +
 ylab("Number of genes") +
 xlab("GO Terms") +
 theme(axis.text.y = element_text(size=10)) +
  ggtitle(paste0("GO enrichment in", unique(df$cluster_name)))
}

# lapply(DE_drug_r_hg19_genes, function(x){plot_GO(df=x, db="org.Hs.eg.db")}) 
# There is a much smaller number of DE so the enrichment is non significant
lapply(DE_subtype_hg19_genes, function(x){plot_GO(df=x, db="org.Hs.eg.db")})
## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## # A tibble: 20 Ă— 9
##    ID         Descript…¹ GeneR…² BgRatio   pvalue p.adjust   qvalue geneID Count
##    <chr>      <fct>      <chr>   <chr>      <dbl>    <dbl>    <dbl> <chr>  <int>
##  1 GO:0044089 positive … 40/820  498/18… 1.41e- 4 3.64e- 3 2.88e- 3 CLSTN…    40
##  2 GO:0040013 negative … 41/820  419/18… 1.08e- 6 8.53e- 5 6.73e- 5 IL24/…    41
##  3 GO:0051960 regulatio… 41/820  456/18… 8.87e- 6 4.25e- 4 3.36e- 4 CLSTN…    41
##  4 GO:0001503 ossificat… 42/820  429/18… 7.86e- 7 6.63e- 5 5.23e- 5 ID3/R…    42
##  5 GO:0052547 regulatio… 42/820  459/18… 4.47e- 6 2.51e- 4 1.98e- 4 CASP9…    42
##  6 GO:0050678 regulatio… 44/820  414/18… 4.14e- 8 7.21e- 6 5.69e- 6 ERRFI…    44
##  7 GO:0048732 gland dev… 44/820  441/18… 2.54e- 7 2.76e- 5 2.18e- 5 WLS/T…    44
##  8 GO:0010975 regulatio… 45/820  446/18… 1.34e- 7 1.80e- 5 1.42e- 5 EPHB2…    45
##  9 GO:0050878 regulatio… 46/820  390/18… 7.46e-10 3.90e- 7 3.08e- 7 EPHB2…    46
## 10 GO:0016049 cell grow… 46/820  493/18… 9.18e- 7 7.50e- 5 5.92e- 5 CLSTN…    46
## 11 GO:0030198 extracell… 48/820  318/18… 3.84e-14 7.22e-11 5.70e-11 VWA1/…    48
## 12 GO:0043062 extracell… 48/820  319/18… 4.34e-14 7.22e-11 5.70e-11 VWA1/…    48
## 13 GO:0045229 external … 48/820  321/18… 5.53e-14 7.22e-11 5.70e-11 VWA1/…    48
## 14 GO:0032102 negative … 48/820  436/18… 3.18e- 9 1.28e- 6 1.01e- 6 SMPDL…    48
## 15 GO:0031589 cell-subs… 49/820  369/18… 2.86e-12 2.37e- 9 1.87e- 9 LAMB3…    49
## 16 GO:0001655 urogenita… 51/820  360/18… 7.91e-14 8.27e-11 6.53e-11 CASP9…    51
## 17 GO:0050673 epithelia… 51/820  481/18… 3.70e- 9 1.38e- 6 1.09e- 6 ERRFI…    51
## 18 GO:0045785 positive … 51/820  484/18… 4.57e- 9 1.59e- 6 1.26e- 6 RUNX3…    51
## 19 GO:0001667 ameboidal… 55/820  492/18… 1.23e-10 8.07e- 8 6.37e- 8 S100A…    55
## 20 GO:0042060 wound hea… 64/820  442/18… 1.54e-17 8.03e-14 6.34e-14 EPHB2…    64
## # … with abbreviated variable names ¹​Description, ²​GeneRatio
## # A tibble: 20 Ă— 9
##    ID         Description   GeneR…¹ BgRatio  pvalue p.adj…²  qvalue geneID Count
##    <chr>      <fct>         <chr>   <chr>     <dbl>   <dbl>   <dbl> <chr>  <int>
##  1 GO:0045104 intermediate… 10/352  89/189… 6.09e-6 1.93e-3 1.59e-3 VIM/K…    10
##  2 GO:0045103 intermediate… 10/352  90/189… 6.74e-6 1.98e-3 1.63e-3 VIM/K…    10
##  3 GO:0048661 positive reg… 10/352  98/189… 1.45e-5 3.72e-3 3.06e-3 IGFBP…    10
##  4 GO:0042303 molting cycle 10/352  115/18… 5.82e-5 8.27e-3 6.81e-3 TGFB2…    10
##  5 GO:0042633 hair cycle    10/352  115/18… 5.82e-5 8.27e-3 6.81e-3 TGFB2…    10
##  6 GO:0007584 response to … 12/352  154/18… 3.23e-5 5.13e-3 4.22e-3 CYP1B…    12
##  7 GO:0002526 acute inflam… 13/352  114/18… 2.07e-7 2.66e-4 2.19e-4 S100A…    13
##  8 GO:0007568 aging         13/352  169/18… 1.75e-5 4.01e-3 3.30e-3 CASP9…    13
##  9 GO:0071695 anatomical s… 16/352  250/18… 1.97e-5 4.27e-3 3.51e-3 TGFB2…    16
## 10 GO:0021700 developmenta… 18/352  304/18… 1.73e-5 4.01e-3 3.30e-3 NFIA/…    18
## 11 GO:0070372 regulation o… 18/352  315/18… 2.77e-5 4.57e-3 3.76e-3 ERRFI…    18
## 12 GO:0032496 response to … 19/352  339/18… 2.18e-5 4.45e-3 3.66e-3 CASP9…    19
## 13 GO:0070371 ERK1 and ERK… 19/352  340/18… 2.27e-5 4.45e-3 3.66e-3 ERRFI…    19
## 14 GO:0043588 skin develop… 21/352  302/18… 2.58e-7 2.66e-4 2.19e-4 ERRFI…    21
## 15 GO:0001655 urogenital s… 22/352  360/18… 1.20e-6 9.00e-4 7.40e-4 CASP9…    22
## 16 GO:0032102 negative reg… 22/352  436/18… 2.52e-5 4.57e-3 3.76e-3 DUSP1…    22
## 17 GO:0042060 wound healing 23/352  442/18… 1.01e-5 2.77e-3 2.27e-3 SPRR3…    23
## 18 GO:0008544 epidermis de… 25/352  362/18… 2.09e-8 4.30e-5 3.54e-5 ERRFI…    25
## 19 GO:0050673 epithelial c… 26/352  481/18… 1.31e-6 9.00e-4 7.40e-4 ERRFI…    26
## 20 GO:0045785 positive reg… 31/352  484/18… 2.43e-9 1.00e-5 8.25e-6 TGFB2…    31
## # … with abbreviated variable names ¹​GeneRatio, ²​p.adjust
## # A tibble: 20 Ă— 9
##    ID         Description   GeneR…¹ BgRatio  pvalue p.adj…²  qvalue geneID Count
##    <chr>      <fct>         <chr>   <chr>     <dbl>   <dbl>   <dbl> <chr>  <int>
##  1 GO:0032651 regulation o… 13/411  111/18… 8.77e-7 6.71e-4 5.72e-4 ERRFI…    13
##  2 GO:0022612 gland morpho… 13/411  123/18… 2.83e-6 1.22e-3 1.04e-3 IGFBP…    13
##  3 GO:0007584 response to … 14/411  154/18… 7.04e-6 2.01e-3 1.72e-3 CYP1B…    14
##  4 GO:0048469 cell maturat… 15/411  187/18… 1.52e-5 3.44e-3 2.93e-3 EPAS1…    15
##  5 GO:0007565 female pregn… 16/411  189/18… 3.99e-6 1.43e-3 1.22e-3 RGS2/…    16
##  6 GO:0044706 multi-multic… 16/411  218/18… 2.41e-5 4.07e-3 3.47e-3 RGS2/…    16
##  7 GO:0045444 fat cell dif… 17/411  244/18… 2.64e-5 4.07e-3 3.47e-3 RGS2/…    17
##  8 GO:0071695 anatomical s… 18/411  250/18… 9.84e-6 2.48e-3 2.12e-3 EPAS1…    18
##  9 GO:0043588 skin develop… 19/411  302/18… 3.71e-5 5.08e-3 4.34e-3 ERRFI…    19
## 10 GO:0070372 regulation o… 20/411  315/18… 2.04e-5 3.80e-3 3.24e-3 ERRFI…    20
## 11 GO:0001933 negative reg… 20/411  338/18… 5.52e-5 6.23e-3 5.31e-3 ERRFI…    20
## 12 GO:0032496 response to … 21/411  339/18… 1.84e-5 3.80e-3 3.24e-3 PDE4B…    21
## 13 GO:0070371 ERK1 and ERK… 21/411  340/18… 1.92e-5 3.80e-3 3.24e-3 ERRFI…    21
## 14 GO:0021700 developmenta… 22/411  304/18… 9.39e-7 6.71e-4 5.72e-4 CLSTN…    22
## 15 GO:0042326 negative reg… 22/411  381/18… 3.42e-5 5.06e-3 4.31e-3 ERRFI…    22
## 16 GO:0050727 regulation o… 23/411  414/18… 4.15e-5 5.08e-3 4.34e-3 CAMK2…    23
## 17 GO:0008544 epidermis de… 24/411  362/18… 1.44e-6 8.13e-4 6.93e-4 ERRFI…    24
## 18 GO:0042060 wound healing 24/411  442/18… 4.08e-5 5.08e-3 4.34e-3 S100A…    24
## 19 GO:2001233 regulation o… 27/411  374/18… 5.66e-8 2.43e-4 2.07e-4 S100A…    27
## 20 GO:0045785 positive reg… 29/411  484/18… 9.31e-7 6.71e-4 5.72e-4 DUSP1…    29
## # … with abbreviated variable names ¹​GeneRatio, ²​p.adjust
## # A tibble: 20 Ă— 9
##    ID         Description  GeneR…¹ BgRatio   pvalue p.adj…²  qvalue geneID Count
##    <chr>      <fct>        <chr>   <chr>      <dbl>   <dbl>   <dbl> <chr>  <int>
##  1 GO:0062012 regulation … 27/701  339/18… 1.71e- 4 9.45e-3 7.93e-3 ARNT/…    27
##  2 GO:0002181 cytoplasmic… 29/701  161/18… 1.37e-12 6.95e-9 5.83e-9 RPS8/…    29
##  3 GO:0032496 response to… 29/701  339/18… 2.76e- 5 2.91e-3 2.44e-3 PDE4B…    29
##  4 GO:0008544 epidermis d… 29/701  362/18… 8.98e- 5 5.99e-3 5.02e-3 ERRFI…    29
##  5 GO:0050727 regulation … 31/701  414/18… 1.78e- 4 9.69e-3 8.13e-3 S100A…    31
##  6 GO:0006631 fatty acid … 32/701  400/18… 4.05e- 5 3.80e-3 3.19e-3 SCP2/…    32
##  7 GO:0050678 regulation … 32/701  414/18… 7.81e- 5 5.42e-3 4.55e-3 ERRFI…    32
##  8 GO:0043434 response to… 32/701  421/18… 1.07e- 4 6.76e-3 5.67e-3 ERRFI…    32
##  9 GO:0048545 response to… 33/701  339/18… 4.64e- 7 1.96e-4 1.64e-4 ERRFI…    33
## 10 GO:0031589 cell-substr… 33/701  369/18… 3.05e- 6 9.65e-4 8.10e-4 LAMB3…    33
## 11 GO:0001666 response to… 34/701  296/18… 5.31e- 9 8.97e-6 7.52e-6 ARNT/…    34
## 12 GO:0007159 leukocyte c… 34/701  414/18… 1.36e- 5 2.16e-3 1.81e-3 TNFRS…    34
## 13 GO:0062197 cellular re… 35/701  345/18… 7.60e- 8 4.28e-5 3.59e-5 ERRFI…    35
## 14 GO:0052547 regulation … 35/701  459/18… 4.82e- 5 4.36e-3 3.66e-3 DHCR2…    35
## 15 GO:0045936 negative re… 36/701  440/18… 8.31e- 6 1.83e-3 1.53e-3 ERRFI…    36
## 16 GO:0050673 epithelial … 36/701  481/18… 5.55e- 5 4.77e-3 4.00e-3 ERRFI…    36
## 17 GO:0042060 wound heali… 37/701  442/18… 3.69e- 6 1.09e-3 9.13e-4 S100A…    37
## 18 GO:0001667 ameboidal-t… 37/701  492/18… 3.97e- 5 3.79e-3 3.18e-3 TGFB2…    37
## 19 GO:0006979 response to… 41/701  434/18… 4.37e- 8 3.16e-5 2.65e-5 DHCR2…    41
## 20 GO:0045785 positive re… 47/701  484/18… 1.82e- 9 4.60e-6 3.86e-6 TNFRS…    47
## # … with abbreviated variable names ¹​GeneRatio, ²​p.adjust
## # A tibble: 20 Ă— 9
##    ID         Description   GeneR…¹ BgRatio  pvalue p.adj…²  qvalue geneID Count
##    <chr>      <fct>         <chr>   <chr>     <dbl>   <dbl>   <dbl> <chr>  <int>
##  1 GO:0050900 leukocyte mi… 42/1108 398/18… 1.66e-4 9.03e-3 7.56e-3 NBL1/…    42
##  2 GO:0030198 extracellula… 43/1108 318/18… 2.67e-7 1.35e-4 1.13e-4 ADAMT…    43
##  3 GO:1901214 regulation o… 43/1108 325/18… 4.89e-7 1.81e-4 1.51e-4 CASP9…    43
##  4 GO:0050878 regulation o… 43/1108 390/18… 5.13e-5 4.59e-3 3.84e-3 VAV3/…    43
##  5 GO:0070997 neuron death  44/1108 368/18… 5.60e-6 1.03e-3 8.66e-4 CASP9…    44
##  6 GO:0001558 regulation o… 44/1108 422/18… 1.54e-4 8.63e-3 7.23e-3 CLSTN…    44
##  7 GO:0045936 negative reg… 45/1108 440/18… 2.02e-4 9.89e-3 8.28e-3 ERRFI…    45
##  8 GO:0006631 fatty acid m… 46/1108 400/18… 9.70e-6 1.57e-3 1.31e-3 FABP3…    46
##  9 GO:0032102 negative reg… 46/1108 436/18… 8.33e-5 6.24e-3 5.23e-3 NBL1/…    46
## 10 GO:0008544 epidermis de… 47/1108 362/18… 2.55e-7 1.35e-4 1.13e-4 ERRFI…    47
## 11 GO:0001655 urogenital s… 48/1108 360/18… 8.43e-8 6.68e-5 5.59e-5 CASP9…    48
## 12 GO:0031589 cell-substra… 48/1108 369/18… 1.80e-7 1.25e-4 1.04e-4 LAMB3…    48
## 13 GO:0001503 ossification  48/1108 429/18… 1.30e-5 1.81e-3 1.51e-3 ID3/R…    48
## 14 GO:0052547 regulation o… 49/1108 459/18… 3.67e-5 3.55e-3 2.97e-3 CASP9…    49
## 15 GO:0016049 cell growth   50/1108 493/18… 1.13e-4 7.79e-3 6.52e-3 CLSTN…    50
## 16 GO:0050673 epithelial c… 51/1108 481/18… 3.07e-5 3.21e-3 2.69e-3 ERRFI…    51
## 17 GO:0006979 response to … 52/1108 434/18… 7.56e-7 2.33e-4 1.95e-4 DHCR2…    52
## 18 GO:0001667 ameboidal-ty… 52/1108 492/18… 2.80e-5 2.99e-3 2.50e-3 S100A…    52
## 19 GO:0042060 wound healing 56/1108 442/18… 4.31e-8 4.78e-5 4.00e-5 VAV3/…    56
## 20 GO:0045785 positive reg… 61/1108 484/18… 1.30e-8 3.23e-5 2.70e-5 RUNX3…    61
## # … with abbreviated variable names ¹​GeneRatio, ²​p.adjust
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

Murine cells

metadata_mm10 <- mm10_scRNA@meta.data
metadata_mm10$cluster_name <- factor(mm10_scRNA@active.ident)

mm10_scRNA_pseudo <- SingleCellExperiment(assays=list(counts=mm10_scRNA@assays$RNA@counts),
                                        colData=metadata_mm10)


summed_counts_mm10 <- aggregateAcrossCells(mm10_scRNA_pseudo, 
    ids=colData(mm10_scRNA_pseudo)[,c("cluster_name","drug_resistance","subtype")], use.assay.type="counts")
summed_counts_mm10
## class: SingleCellExperiment 
## dim: 27998 55 
## metadata(0):
## assays(1): counts
## rownames(27998): Xkr4 Gm1992 ... Vmn2r122 CAAA01147332.1
## rowData names(0):
## colnames: NULL
## colData names(16): orig.ident nCount_RNA ... subtype ncells
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
# Remove combinations with few cells
summed_counts_filt_mm10 <- summed_counts_mm10[,summed_counts_mm10$ncells>=10]

# We will use this wrapper function from the scatter package, that combines all steps required for differential expression analysis with edgeR
DE_subtype_mm10 <- pseudoBulkDGE(summed_counts_filt_mm10, 
    label=summed_counts_filt_mm10$cluster_name,
    design=~drug_resistance+subtype, # Effect of subtype, accounting for drug resistance
    coef="subtypeTriple_negative",
    condition=summed_counts_filt_mm10$subtype 
)

DE_subtype_mm10_genes <- list()

for (i in 1:length(DE_subtype_mm10)){
  filtered_df <- DE_subtype_mm10[[i]] %>% 
    as.data.frame() %>% 
    #dplyr::filter(abs(logFC)>1 & FDR<0.01) %>% 
    mutate(cluster_name=names(DE_subtype_mm10)[i],
           gene=rownames(.))
  DE_subtype_mm10_genes[[i]] <- filtered_df
}

# Same for drug resistance effect

DE_drug_r_mm10 <- pseudoBulkDGE(summed_counts_filt_mm10, 
    label=summed_counts_filt_mm10$cluster_name,
    design=~subtype + drug_resistance, # Effect drug resistance, accounting for subtype
    coef="drug_resistanceSensitive",
    condition=summed_counts_filt_mm10$drug_resistance
)

DE_drug_r_mm10_genes <- list()

for (i in 1:length(DE_drug_r_mm10)){
  filtered_df <- DE_drug_r_mm10[[i]] %>% 
    as.data.frame() %>% 
    #dplyr::filter(abs(logFC)>0.5 & FDR<0.05) %>% 
    mutate(cluster_name=names(DE_drug_r_mm10)[i],
           gene=rownames(.))
  DE_drug_r_mm10_genes[[i]] <- filtered_df
}
plots_subtype_mm10 <- list()

for (i in 1:length(DE_subtype_mm10_genes)){
  df = DE_subtype_mm10_genes[[i]]
  df[is.na(df)] <- 0
  df$diffexpressed <- "NO"
  df$diffexpressed[df$logFC > 1 & df$FDR < 0.01] <- "UP"
  df$diffexpressed[df$logFC < -1 & df$FDR < 0.01] <- "DOWN"
  p <- ggplot(data=df, aes(x=logFC, y=-log10(FDR), col=diffexpressed)) + 
    geom_point() + 
    theme_minimal() + 
    scale_color_manual(values=volcano_colors) +
    ggtitle(paste0(unique(df$cluster_name))) +
    ylim(0,15)
  plots_subtype_mm10[[i]] <-p
}

ggarrange(plotlist=plots_subtype_mm10, legend.grob = legend_volcano, nrow = 1, legend="right")

plots_resistance_mm10 <- list()

for (i in 1:length(DE_drug_r_mm10_genes)){
  df = DE_drug_r_mm10_genes[[i]]
  df[is.na(df)] <- 0
  df$diffexpressed <- "NO"
  df$diffexpressed[df$logFC > 1 & df$FDR < 0.01] <- "UP"
  df$diffexpressed[df$logFC < -1 & df$FDR < 0.01] <- "DOWN"
  p <- ggplot(data=df, aes(x=logFC, y=-log10(FDR), col=diffexpressed)) + 
    geom_point() + 
    theme_minimal() + 
    scale_color_manual(values=volcano_colors) +
    ggtitle(paste0(unique(df$cluster_name))) +
    ylim(0,15)
  plots_resistance_mm10[[i]] <-p
}

ggarrange(plotlist=plots_resistance_mm10, legend.grob = legend_volcano, nrow = 1, legend="right")

Let’s perform a functional enrichment analysis to see the functions these genes participate in:

Gene ontology

lapply(DE_drug_r_mm10_genes, function(x){plot_GO(df=x, db="org.Mm.eg.db")})
lapply(DE_subtype_mm10_genes, function(x){plot_GO(df=x, db="org.Mm.eg.db")})

There is no significant enrichment of these genes.

Session info

sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22000)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=Spanish_Spain.utf8  LC_CTYPE=Spanish_Spain.utf8   
## [3] LC_MONETARY=Spanish_Spain.utf8 LC_NUMERIC=C                  
## [5] LC_TIME=Spanish_Spain.utf8    
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ggpubr_0.5.0                patchwork_1.1.2            
##  [3] dplyr_1.0.10                msigdbr_7.5.1              
##  [5] clusterProfiler_4.6.0       org.Mm.eg.db_3.16.0        
##  [7] org.Hs.eg.db_3.16.0         AnnotationDbi_1.60.0       
##  [9] scran_1.26.0                scater_1.26.1              
## [11] ggplot2_3.4.0               scuttle_1.8.1              
## [13] SingleCellExperiment_1.20.0 SummarizedExperiment_1.28.0
## [15] Biobase_2.58.0              GenomicRanges_1.50.1       
## [17] GenomeInfoDb_1.34.3         IRanges_2.32.0             
## [19] S4Vectors_0.36.0            BiocGenerics_0.44.0        
## [21] MatrixGenerics_1.10.0       matrixStats_0.63.0         
## [23] SeuratObject_4.1.3          Seurat_4.3.0               
## [25] miloR_1.6.0                 edgeR_3.40.0               
## [27] limma_3.54.0               
## 
## loaded via a namespace (and not attached):
##   [1] scattermore_0.8           tidyr_1.2.1              
##   [3] bit64_4.0.5               knitr_1.41               
##   [5] irlba_2.3.5.1             DelayedArray_0.24.0      
##   [7] data.table_1.14.6         KEGGREST_1.38.0          
##   [9] RCurl_1.98-1.9            generics_0.1.3           
##  [11] ScaledMatrix_1.6.0        cowplot_1.1.1            
##  [13] RSQLite_2.2.19            shadowtext_0.1.2         
##  [15] RANN_2.6.1                future_1.29.0            
##  [17] bit_4.0.5                 enrichplot_1.18.1        
##  [19] spatstat.data_3.0-0       httpuv_1.6.6             
##  [21] assertthat_0.2.1          viridis_0.6.2            
##  [23] xfun_0.35                 jquerylib_0.1.4          
##  [25] babelgene_22.9            evaluate_0.18            
##  [27] promises_1.2.0.1          fansi_1.0.3              
##  [29] igraph_1.3.5              DBI_1.1.3                
##  [31] htmlwidgets_1.5.4         spatstat.geom_3.0-3      
##  [33] purrr_0.3.5               ellipsis_0.3.2           
##  [35] backports_1.4.1           deldir_1.0-6             
##  [37] sparseMatrixStats_1.10.0  vctrs_0.5.1              
##  [39] ROCR_1.0-11               abind_1.4-5              
##  [41] cachem_1.0.6              withr_2.5.0              
##  [43] ggforce_0.4.1             HDO.db_0.99.1            
##  [45] progressr_0.11.0          sctransform_0.3.5        
##  [47] treeio_1.22.0             goftest_1.2-3            
##  [49] cluster_2.1.4             DOSE_3.24.2              
##  [51] ape_5.6-2                 lazyeval_0.2.2           
##  [53] crayon_1.5.2              spatstat.explore_3.0-5   
##  [55] labeling_0.4.2            pkgconfig_2.0.3          
##  [57] tweenr_2.0.2              nlme_3.1-160             
##  [59] vipor_0.4.5               rlang_1.0.6              
##  [61] globals_0.16.2            lifecycle_1.0.3          
##  [63] miniUI_0.1.1.1            downloader_0.4           
##  [65] rsvd_1.0.5                polyclip_1.10-4          
##  [67] lmtest_0.9-40             Matrix_1.5-1             
##  [69] aplot_0.1.9               carData_3.0-5            
##  [71] zoo_1.8-11                beeswarm_0.4.0           
##  [73] ggridges_0.5.4            png_0.1-7                
##  [75] viridisLite_0.4.1         bitops_1.0-7             
##  [77] gson_0.0.9                KernSmooth_2.23-20       
##  [79] Biostrings_2.66.0         blob_1.2.3               
##  [81] DelayedMatrixStats_1.20.0 stringr_1.4.1            
##  [83] qvalue_2.30.0             parallelly_1.32.1        
##  [85] spatstat.random_3.0-1     rstatix_0.7.1            
##  [87] gridGraphics_0.5-1        ggsignif_0.6.4           
##  [89] beachmat_2.14.0           scales_1.2.1             
##  [91] memoise_2.0.1             magrittr_2.0.3           
##  [93] plyr_1.8.8                ica_1.0-3                
##  [95] zlibbioc_1.44.0           compiler_4.2.2           
##  [97] scatterpie_0.1.8          dqrng_0.3.0              
##  [99] RColorBrewer_1.1-3        fitdistrplus_1.1-8       
## [101] cli_3.4.1                 XVector_0.38.0           
## [103] listenv_0.8.0             pbapply_1.6-0            
## [105] MASS_7.3-58.1             tidyselect_1.2.0         
## [107] stringi_1.7.8             highr_0.9                
## [109] yaml_2.3.6                GOSemSim_2.24.0          
## [111] BiocSingular_1.14.0       locfit_1.5-9.6           
## [113] ggrepel_0.9.2             grid_4.2.2               
## [115] sass_0.4.4                fastmatch_1.1-3          
## [117] tools_4.2.2               future.apply_1.10.0      
## [119] parallel_4.2.2            rstudioapi_0.14          
## [121] bluster_1.8.0             metapod_1.6.0            
## [123] gridExtra_2.3             farver_2.1.1             
## [125] Rtsne_0.16                ggraph_2.1.0             
## [127] digest_0.6.30             shiny_1.7.3              
## [129] Rcpp_1.0.9                car_3.1-1                
## [131] broom_1.0.1               later_1.3.0              
## [133] RcppAnnoy_0.0.20          httr_1.4.4               
## [135] colorspace_2.0-3          tensor_1.5               
## [137] reticulate_1.26           splines_4.2.2            
## [139] uwot_0.1.14               yulab.utils_0.0.5        
## [141] statmod_1.4.37            tidytree_0.4.1           
## [143] spatstat.utils_3.0-1      graphlayouts_0.8.4       
## [145] sp_1.5-1                  ggplotify_0.1.0          
## [147] plotly_4.10.1             xtable_1.8-4             
## [149] jsonlite_1.8.3            ggtree_3.6.2             
## [151] tidygraph_1.2.2           ggfun_0.0.9              
## [153] R6_2.5.1                  pillar_1.8.1             
## [155] htmltools_0.5.3           mime_0.12                
## [157] glue_1.6.2                fastmap_1.1.0            
## [159] BiocParallel_1.32.1       BiocNeighbors_1.16.0     
## [161] codetools_0.2-18          fgsea_1.24.0             
## [163] utf8_1.2.2                lattice_0.20-45          
## [165] bslib_0.4.1               spatstat.sparse_3.0-0    
## [167] tibble_3.1.8              ggbeeswarm_0.6.0         
## [169] leiden_0.4.3              gtools_3.9.3             
## [171] GO.db_3.16.0              survival_3.4-0           
## [173] rmarkdown_2.18            munsell_0.5.0            
## [175] GenomeInfoDbData_1.2.9    reshape2_1.4.4           
## [177] gtable_0.3.1